version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220909          // set seed

local pgm  "dst-Figure_6_Panel_B_mental_distress_binscatter"         // file name
local who  "Muzhe Yang"                                              // author
local dte  "2025-01-20"                                              // created date
local dte2 "`c(current_date)'"                                       // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\analysis\figures\\`pgm'.txt", text replace 
display "`tag'"

**# data prep ------------------------------------------------------------------------------------

use "data_clean\dst-data04_for_estimation_within_250_miles", clear
summ dist_to_border
tab wave, missing 
des mental_hlth_distress
codebook TractFIPS

*## (1) create the 9 cells, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

assert !missing(centroid_lat)
assert !missing(region)
gen     cell = 1 if centroid_lat > 40                        & region == "eastern and central"
replace cell = 2 if (34 < centroid_lat & centroid_lat <= 40) & region == "eastern and central"
replace cell = 3 if centroid_lat <= 34                       & region == "eastern and central"
replace cell = 4 if centroid_lat > 40                        & region == "central and mountain"
replace cell = 5 if (34 < centroid_lat & centroid_lat <= 40) & region == "central and mountain"
replace cell = 6 if centroid_lat <= 34                       & region == "central and mountain"
replace cell = 7 if centroid_lat > 40                        & region == "mountain and pacific"
replace cell = 8 if (34 < centroid_lat & centroid_lat <= 40) & region == "mountain and pacific"
replace cell = 9 if centroid_lat <= 34                       & region == "mountain and pacific"
quietly tab cell, gen(cell_)
quietly tab wave, gen(wave_)
summ centroid_lat if cell == 1 | cell == 4 | cell == 7
summ centroid_lat if cell == 2 | cell == 5 | cell == 8
summ centroid_lat if cell == 3 | cell == 6 | cell == 9

*## (2) control variables

global x "pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct"

des dist_to_border dist_to_border_signed $x centroid_lat daylight_dur_max

**# estimation prep -----------------------------------------------------------------------

encode CountyFIPS, gen(county_fips)
egen nomiss = rowmiss($x dist_to_border centroid_lat daylight_dur_max)
assert !missing(StateAbbr)
local radius = 50
gen sample_selection = (nomiss == 0 & StateAbbr != "AZ" & dist_to_border <= `radius')

**# figure --------------------------------------------------------------------------------

preserve
rdplot mental_hlth_distress dist_to_border_signed if sample_selection == 1, c(0) nbins(15 15) p(0) kernel(uni) ci(95) hide genvars
keep rdplot*
duplicates drop
drop if missing(rdplot_id)
sort rdplot_id
summ
twoway (rcap rdplot_ci_l rdplot_ci_r rdplot_mean_x, sort) (scatter rdplot_mean_y rdplot_mean_x, sort mcolor(blue)), ///
       xline(0, lpattern(dash) lwidth(*1) lcolor(gray)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       legend(label(1 "the 95% confidence interval") label(2 "the binned sample mean") order(2 1) rows(1) size(*0.8) position(6)) ///
       title("All time zones in the Contiguous United States", size(*0.9) span) ///
       name(all, replace) scheme(stcolor)
restore

preserve
rdplot mental_hlth_distress dist_to_border_signed if sample_selection == 1 & region == "eastern and central", c(0) nbins(15 15) p(0) kernel(uni) ci(95) hide genvars
keep rdplot*
duplicates drop
drop if missing(rdplot_id)
sort rdplot_id
summ
twoway (rcap rdplot_ci_l rdplot_ci_r rdplot_mean_x, sort) (scatter rdplot_mean_y rdplot_mean_x, sort mcolor(blue)), ///
       xline(0, lpattern(dash) lwidth(*1) lcolor(gray)) ///
       xlabel(-50(10)50) ///
       xtitle("distance < 0 for the control group; distance > 0 for the treatment group", size(*0.8)) ///
       legend(label(1 "the 95% confidence interval") label(2 "the binned sample mean") order(2 1) rows(1) size(*0.8) position(6)) ///
       title("Eastern Time Zone and Central Time Zone", size(*0.9) span) ///
       name(et_ct, replace) scheme(stcolor)
restore

graph combine all et_ct, ///
      iscale(*0.8) ycommon ///
      title("Panel B: Dependent variable is mental distress", size(*0.8) span) ///
      subtitle("The dependent variable is defined as the prevalence of adults aged 18 years or older who report 14 or more days during the past 30 days during which their mental health was not good." ///
               "The prevalence is at the census tract level, measured annually, and measured in 0–100 percentage points.", size(*0.5) span) ///
      scheme(stcolor)

graph save "code\analysis\figures\\`pgm'", replace
graph export "code\analysis\figures\\`pgm'.png", replace
graph export "code\analysis\figures\\`pgm'.emf", replace

log close
exit